* ***************************************************************** *
* ***************************************************************** *
*   File-Name:      data.do                                       *
*   Date:           June 6, 2016                                   *
*   Author:         TG, Shukova                                     *
*   Purpose:      	Recoding of aggregated survey data              *
*   Input File:     aggregate.dta                                   *
*   Output File:    recode.dta                                       *
* ****************************************************************  *
* ****************************************************************  *


version 14.2
clear
capture log close
set more off


**********************************************
* Specify your own local path to the dropbox *
**********************************************


* Thomas G.'s local (office) path:
* local path /Users/gschwend/Dropbox/Andre/OUP multilevel electoral behavior book/chapter 6/replication/


* Thomas G.'s local (Air) path:
* local path /Users/thomasgschwend/Dropbox/Andre/OUP multilevel electoral behavior book/chapter 6/replication/
 
 



use aggregate, clear                     /* load dataset */



* Check data consistency 0
sum  Q17_party1
return list
assert r(N)== 21025  & r(min)==0 & r(max)==10 & r(sum)==113260

sum  Q17_party2
return list
assert r(N)== 20980  & r(min)==0 & r(max)==10 & r(sum)==103707

* for OUP book we look at three countries: France, Germany and Spain


* Get a compact form of the codebook, a combination of the information you get
* from summarize & describe

*codebook, compact



/*

* generate election-specific id
gen eid =.
replace eid = 111 if ELECID==5  //France, National, IDF (Paris)
replace eid = 112 if ELECID==6  //France, National, Provence
replace eid = 101 if ELECID==22 //France, local, IDF (Paris)
replace eid = 102 if ELECID==23 //France, local, Provence (Marseille)
replace eid = 131 if ELECID==17 //France, Europe, IDF (Paris)
replace eid = 132 if ELECID==16 //France, Europe, Provence

replace eid = 211 if ELECID==10 //Germany, National, Lower Sax
replace eid = 212 if ELECID==12  //Germany, National, Bavaria
replace eid = 201 if ELECID==11 //Germany, regioal, Lower Sax
replace eid = 202 if ELECID==13 //Germany, regional, Bavaria
replace eid = 231 if ELECID==18 //Germany, Europe, Lower Sax
replace eid = 232 if ELECID==19 //Germany, Europe, Bavaria

replace eid = 311 if ELECID==7  //Spain, National, Catalonia
replace eid = 312 if ELECID==9  //Spain, National, Madrid
replace eid = 301 if ELECID==8  //Spain, regional, Catalonia
replace eid = 302 if ELECID==24  //Spain, regional, Madrid
replace eid = 331 if ELECID==20 //Spain, Europe, Catalonia
replace eid = 332 if ELECID==21 //Spain, Europe, Madrid


*label define eid 111 "France, national, IDF" 112 "France, National, Provence"
*label value eid eid
*/

**************************************************************************
*************			  PARTY PREFERENCES  				 *************
**************************************************************************


*preserve 						

egen maxrow = rowmax(Q17_party1 Q17_party2 Q17_party3 Q17_party4 Q17_party5 Q17_party6 Q17_party7 Q17_party8 Q17_party9)















** preserve than restore and I don't have to reaload the daya and recode it again!
local part "party1 party2 party3 party4 party5 party6 party7 party8 party9"
foreach v in  `part' {
	   gen max`v' = maxrow - Q17_`v' 
       label var max`v' "Distance of `v' to most preferred party"
       rename Q17_`v' y`v'
	   label var y`v' "Like/Dislike of party `v'"
	   }

gen int seqn = _n
egen ties = anycount(maxparty1-maxparty9), v(0) /* number of ties on first rank +1*/

* avoid that missings get dropped -> assign neg value
*mvencode maxparty*, mv(.=-1) /* get rid of missing values */

* Check data consistency 1
sum  seqn
return list
assert r(N)== 21230  & r(min)==1 & r(max)==21230 & r(sum)==225367065



reshape long y, i(seqn) j(vname1) string

drop if y == .                      /*************************/

* sort seqn y         /*******distroys replicabability. No idea why******************/


* Check data consistency 2
sum  seqn
return list
assert r(N)== 149333  & r(min)==1 & r(max)==21230 & r(sum) ==1653364855



* 9 is max number of parties
forvalues i = 1(1)9 {
by seqn: gen win_pref`i' = y[_N-(`i'-1)]
label var win_pref`i' "like/dislike Score of `i'. preferred party"
by seqn: gen str name_pref`i'= vname1[_N-(`i'-1)]
		}
		



* Check data consistency 3
sum  win_pref1
return list
assert r(N)==   149333  & r(min)==0 & r(max)==10 & r(sum) ==  367264

sum  seqn
return list
assert r(N)== 149333  & r(min)==1 & r(max)==21230 & r(sum) ==1653364855


		
 * win`i' enthält den Wert und name`i' den Namen der i.ten meist populären Partei
 reshape wide y, i(seqn) j(vname1) string
*drop seqn y*




* Check data consistecny 4
sum  win_pref1
return list
assert r(N)==  21079  & r(min)==0 & r(max)==10 
		
encode name_pref1, gen(test1)	   

* Check data consistecy
sum  test1
return list
assert r(N)==  21079  & r(min)==1 & r(max)==9 
drop test1		






* Generate dummy that indicates ties
gen TiePartyPref12 = . 
replace TiePartyPref12 = 1 if win_pref1 == win_pref2
replace TiePartyPref12 = 0 if win_pref1 != win_pref2

gen TiePartyPref13 = 0 
replace TiePartyPref13 = 1 if win_pref1 == win_pref3 & TiePartyPref12 == 1
replace TiePartyPref13 = . if TiePartyPref12 == .

gen TiePartyPref14 = 0 
replace TiePartyPref14 = 1 if win_pref1 == win_pref4 & TiePartyPref13 == 1
replace TiePartyPref14 = . if TiePartyPref13 == .




* Q8 Vote Intention (pre) 

gen Q8 = Q8A
recode Q8 88/max=.  /* recode "other party" to mising */
label var Q8 "Vote intention"

forvalues v = 1(1)9 {
	   gen vote`v' = (Q8 ==`v') 
	   replace vote`v' =. if Q8==.
       label var vote`v' "Vote intention for party `v'"
	   }



* Generate partyNum numeric variable indicating most (and 2nd most) preferred party


gen prefPartyNum1 = regexr(name_pref1,"party","")
destring prefPartyNum1, replace
label var prefPartyNum1 "most preferred party"

gen prefPartyNum2 = regexr(name_pref2,"party","")
destring prefPartyNum2, replace
label var prefPartyNum2 "2nd most preferred party"


* Check data consistecy
sum  prefPartyNum1
return list
assert r(N)== 21079  & r(min)==1 & r(max)==9 



forvalues v = 1(1)9 {
	   gen pref`v' = regexr(name_pref`v',"party","")
	   destring pref`v', replace
       label var pref`v' "Number of  `v'. preferred party"
	   }




/*
save partyPref, replace
restore

sort RESPID
merge 1:1 RESPID ELECID  using partyPref
*/



encode COUNTRY, gen(country)
drop COUNTRY
ren country COUNTRY
* 1= France, 2=Germany, 3=Spain

* Generate dummy that indicates whether a respondent voted for her preferred party

* define strategic if you vote for party that is not clearly most preferred
* votes for party that is tied on first preference are non sincere (strategic)!!!!

gen nonsincere = .
replace nonsincere = 1 if prefPartyNum1 != Q8 & TiePartyPref12==0
replace nonsincere = 0 if prefPartyNum1 == Q8 & TiePartyPref12==0
replace nonsincere = 1 if prefPartyNum1 == Q8 & prefPartyNum2 != Q8 & TiePartyPref12==1

*tab  eid nonsincere, row

* as nonsincere but a vote for a party tied on first-preference is sincere
gen nonsincere1 = .
replace nonsincere1 = 1 if prefPartyNum1 != Q8 & TiePartyPref12==0
replace nonsincere1 = 0 if prefPartyNum1 == Q8 & TiePartyPref12==0
replace nonsincere1 = 0 if prefPartyNum1 != Q8 & prefPartyNum2 == Q8 & TiePartyPref12==1

*tab  eid nonsincere1, row



*tab  eid sincere, row



**************************************************
** Code the DVs: Two Forms of Strategic Voting  **
**************************************************


** construct variable that indicates wether most preferred party (or 2nd most) 
** wins a seat in district or not!


*create id to merge in party information for most-preferred parties
gen idMP1 = eid*100 + prefPartyNum1 
gen idMP2 = eid*100 + prefPartyNum2



*exit
*save test, replace
** cf _all using test, all
*exit

gen caseid = _n

sort idMP1 caseid

*merge id using parties.dta : id* = eid*100 + partynumber

merge m:1 idMP1 using "parties_mergeidMP1.dta"

/*
    not matched                            28
        from master                         1  (_merge==1)
        from using                         27  (_merge==2)

    matched                            21,078  (_merge==3)
    -----------------------------------------
*/

label var large_partyMP1 "most preferred party is a large party"
tab _merge
keep if _merge!=2
drop _merge caseid



gen caseid = _n
sort idMP2 caseid
merge m:1 idMP2 using "parties_mergeidMP2.dta"
label var large_partyMP2 "2nd most preferred party is a large party"
tab _merge
keep if _merge!=2
drop _merge caseid




*********************************************************************************************
*** Indicator for wasted vote: Is most preferred party viable to win a seat?
gen seat_current0MP1 = (seat_currentMP1==0)
gen seat_previous0MP1 = (seat_previousMP1==0)

label var seat_current0MP1 "most preferred party not viable (currently no seat)"
label var seat_previous0MP1 "most preferred party not viable (previously no seat)"

* get district-level recodes in (same as in country specific do-files)

**** SPAIN
* based-on district-level results
replace seat_current0MP1=0 if eid==301 /*Catalonia Regional Election */
replace seat_current0MP1=1 if eid==301 & partynumber==9 & district!=1
replace seat_current0MP1=1 if eid==301 & partynumber==8 
replace seat_current0MP1=1 if eid==301 & partynumber==7
replace seat_current0MP1=1 if eid==301 & partynumber==6 & (district==4 | district==3)

replace seat_previous0MP1=0 if eid==301 /*Catalonia Regional Election */
replace seat_previous0MP1=1 if eid==301 & partynumber==9 
replace seat_previous0MP1=1 if eid==301 & partynumber==8 
replace seat_previous0MP1=1 if eid==301 & partynumber==7 & (district==2 | district==3)
replace seat_previous0MP1=1 if eid==301 & partynumber==6 & (district!=1)
replace seat_previous0MP1=1 if eid==301 & partynumber==3 & (district==3)


replace seat_current0MP1=0 if eid==311 /*Catalonia National Election */
replace seat_current0MP1=1 if eid==311 & partynumber==7
replace seat_current0MP1=1 if eid==311 & partynumber==6 
replace seat_current0MP1=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_current0MP1=1 if eid==311 & partynumber==3 & district!=1

replace seat_previous0MP1=0 if eid==311 /*Catalonia National Election */
replace seat_previous0MP1=1 if eid==311 & partynumber==7 
replace seat_previous0MP1=1 if eid==311 & partynumber==6 
replace seat_previous0MP1=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_previous0MP1=1 if eid==311 & partynumber==3 & (district==2 | district==4)
replace seat_previous0MP1=1 if eid==311 & partynumber==2 & (district==4)


*** France

**eid=111
gen caseid = _n
sort district_name caseid 

merge m:1 district_name using "partycodes.dta"
tab _merge


* Which districts are not covered? Paris 14 was won in 1st round. Not in sample.
tab district_name if _merge==1
drop if _merge==2
drop _merge caseid

gen notwastedMP1 = (prefPartyNum1 == party1 | prefPartyNum1 == party2 | prefPartyNum1 == party3)
replace notwastedMP1=. if prefPartyNum1==.
replace notwastedMP1 = . if eid!=111
drop  party1 party2 party3 departementcode departementname 

*** eid=112
gen caseid = _n
sort district_name caseid 

merge m:1 district_name using "partycodes.dta"
tab _merge

tab district_name if _merge==1

/* Districts won in first round
					party1		party2	party3
ALPES MARITIMES	6	SOC1		UMP2	FN3
ALPES MARITIMES	7	UMP2		FN3
*/

drop if _merge==2
drop _merge caseid





gen notwasted1MP1 = 1     if prefPartyNum1 == 1  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP1 = 1 if prefPartyNum1 == 2  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP1 = 1 if prefPartyNum1 == 3  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP1 = 1 if prefPartyNum1 == 2  & district_name== "ALPES MARITIMES 7"
replace notwasted1MP1 = 1 if prefPartyNum1 == 3  & district_name== "ALPES MARITIMES 7"

replace notwasted1MP1 = . if eid!=112 /* make sure this only applies to those districts */

replace notwastedMP1=1 if notwasted1MP1==1

drop  party1 party2 party3 departementcode departementname

* now fix  district level results: France national
replace seat_current0MP1 = 1 if notwastedMP1==0

*assume for France that previous is the same as current*
*replace seat_current0MP1 = 1 if notwastedMP1==0
*********************************************************************************************




*********************************************************************************************
*** Indicator for wasted vote: Is most preferred party viable to win a seat?
gen seat_current0MP2 = (seat_currentMP2==0)
gen seat_previous0MP2 = (seat_previousMP2==0)

label var seat_current0MP2 "most preferred party not viable (currently no seat)"
label var seat_previous0MP2 "most preferred party not viable (previously no seat)"

* get district-level recodes in (same as in country specific do-files)

**** SPAIN
* based-on district-level results
replace seat_current0MP2=0 if eid==301 /*Catalonia Regional Election */
replace seat_current0MP2=1 if eid==301 & partynumber==9 & district!=1
replace seat_current0MP2=1 if eid==301 & partynumber==8 
replace seat_current0MP2=1 if eid==301 & partynumber==7
replace seat_current0MP2=1 if eid==301 & partynumber==6 & (district==4 | district==3)

replace seat_previous0MP2=0 if eid==301 /*Catalonia Regional Election */
replace seat_previous0MP2=1 if eid==301 & partynumber==9 
replace seat_previous0MP2=1 if eid==301 & partynumber==8 
replace seat_previous0MP2=1 if eid==301 & partynumber==7 & (district==2 | district==3)
replace seat_previous0MP2=1 if eid==301 & partynumber==6 & (district!=1)
replace seat_previous0MP2=1 if eid==301 & partynumber==3 & (district==3)


replace seat_current0MP2=0 if eid==311 /*Catalonia National Election */
replace seat_current0MP2=1 if eid==311 & partynumber==7
replace seat_current0MP2=1 if eid==311 & partynumber==6 
replace seat_current0MP2=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_current0MP2=1 if eid==311 & partynumber==3 & district!=1

replace seat_previous0MP2=0 if eid==311 /*Catalonia National Election */
replace seat_previous0MP2=1 if eid==311 & partynumber==7 
replace seat_previous0MP2=1 if eid==311 & partynumber==6 
replace seat_previous0MP2=1 if eid==311 & partynumber==5 & (district==2 | district==3)
replace seat_previous0MP2=1 if eid==311 & partynumber==3 & (district==2 | district==4)
replace seat_previous0MP2=1 if eid==311 & partynumber==2 & (district==4)


*** France

**eid=111
gen caseid = _n
sort district_name caseid 

merge m:1 district_name using "partycodes.dta"
tab _merge

* Which districts are not covered? Paris 14 was won in 1st round. Not in sample.
tab district_name if _merge==1
drop if _merge==2
drop _merge caseid

gen notwastedMP2 = (prefPartyNum2 == party1 | prefPartyNum2 == party2 | prefPartyNum2 == party3)
replace notwastedMP2=. if prefPartyNum2==.
replace notwastedMP2 = . if eid!=111
drop  party1 party2 party3 departementcode departementname 

*** eid=112
gen caseid = _n
sort district_name caseid 

merge m:1 district_name using "partycodes.dta"
tab _merge

tab district_name if _merge==1

/* Districts won in first round
					party1		party2	party3
ALPES MARITIMES	6	SOC1		UMP2	FN3
ALPES MARITIMES	7	UMP2		FN3
*/

drop if _merge==2
drop _merge caseid

gen notwasted1MP2 = 1     if prefPartyNum2 == 1  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP2 = 1 if prefPartyNum2 == 2  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP2 = 1 if prefPartyNum2 == 3  & district_name== "ALPES MARITIMES 6"
replace notwasted1MP2 = 1 if prefPartyNum2 == 2  & district_name== "ALPES MARITIMES 7"
replace notwasted1MP2 = 1 if prefPartyNum2 == 3  & district_name== "ALPES MARITIMES 7"
                                         
replace notwasted1MP2 = . if eid!=112 /* make sure this only applies to those districts */

replace notwastedMP2=1 if notwasted1MP2==1

drop  party1 party2 party3 departementcode departementname

* now fix  district level results: France national
replace seat_current0MP2 = 1 if notwastedMP2==0

*assume for France that previous is the same as current*
*replace seat_current0MP1 = 1 if notwastedMP1==0
*********************************************************************************************
compress

save recode, replace


* Check data consistency (Replicates with the same recode.dta)

sum seqn
return list
assert r(N)== 21079  & r(min)==1 & r(max)==21230 & r(sum) ==   224103453




*exit
*save test, replace
** cf _all using test, all
*exit


/* Old code







/*
A voter is coded as if she follows the wasted-vote strategy if she 
(1) does not cast her vote for the most preferred party, 
(2) her most-preferred party is not expected to be viable, i.e. is 
    not expected to win at least one seat in the voter’s electoral district and, 
(3) she votes for a party instead that is expected to gain representation
*/


exit












forvalues v = 1(1)9 {
	   gen nonsincere_`v' =.
	   replace nonsincere`_v' = 1 if prefPartyNum1 != `v' & vote`v'==1 
	   replace nonsincere_`v' = 0 if prefPartyNum1 == `v' & vote`v'==1
	   replace nonsincere_`v' = 1 if prefPartyNum1 == `v' & vote`v'==1 & prefPartyNum2 != `v' & TiePartyPref12==1
	   replace nonsincere_`v' = 0 if prefPartyNum1 != `v' & vote`v'==1 & prefPartyNum2 == `v' & TiePartyPref12==1
       label var nonsincere_`v' "Non-sincere (=1) vote for party `v'"
	   }





gen level_new = 1 if LEVEL=="National"
replace level_new = 2 if LEVEL=="Regional"
replace level_new = 3 if LEVEL=="European"


collapse (mean) sincere nonsincere*  level_new COUNTRY, by(eid)


replace nonsincere  = nonsincere*100
replace nonsincere1 = nonsincere1*100
replace sincere = sincere*100

label define level_new 2 "Regional" 1 "National" 3 "Euopean"
label value level_new level_new

gen national = (level_new==1)
gen regional = (level_new==2)



label define COUNTRY 1 "France" 2 "Germany" 3 "Spain"
label value COUNTRY COUNTRY

gen Germany = (COUNTRY==2)
gen Spain   = (COUNTRY==3)
gen France  = (COUNTRY==1)



save party, replace 

use party, clear

#delimit ;
graph box nonsincere, over(level_new)
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
		graphregion(fcolor(white)) 
		ylabel(20(10)50, labsize(small) angle(horizontal) nogrid) yscale(nofextend) 
		plotregion(lcolor(white) margin(l=4))
saving(box1, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export box1.pdf, replace


*graph box nonsincere1, over(level_new)

#delimit ;
twoway scatter nonsincere  COUNTRY, mlabel(level_new)
	legend(off)        // get rid of legend
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
	xtitle(" ", color(black) alignment(bottom )) xscale(noline)  
	ytitle("Share of Non-Sincere Voting")              // incl. axis titles
	ylabel(20(10)50, labsize(small) angle(horizontal)) yscale(nofextend)  // use smaler & readable labels
	xlabel(0 " " 1 "France" 2 "Germany" 3 "Spain" 4" ", notick)                                    // use less labels
		graphregion(fcolor(white)) 
	plotregion(lcolor(white) margin(l=4))	/* get rid of box (give it the same color as the background) except the axes & increase left margin*/
saving(graph1, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export graph1.pdf, replace

reg nonsincere national regional 
eststo est0
reg nonsincere national regional Germany Spain
eststo est1
reg nonsincere1 national regional
eststo est2
reg nonsincere1 national regional Germany Spain
eststo est3


#delimit ;
esttab est0 est1 est2 est3 using regional_level.rtf, replace b(%9.2f) se(%9.2f)  rtf  noobs star(* 0.10 ** 0.05)
title ("Table X.1: Non-Sincere Voting across Electoral Arenas")
varlabels(_cons "Constant" nonsincere1 "Non-Sincere" national "National" regional "Regional"
          Germany "Germany" Spain "Spain" )
stats(r2) sfmt(%9.0f)  sca("r2 R-squared") nonotes
addnote ("N = `e(N)'. Standard errors in parentheses. * p < 0.10, ** p < 0.05")
order(national regional Germany Spain)
;
#delimit cr


************ Sincere Voting


#delimit ;
graph box sincere, over(level_new)
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
		graphregion(fcolor(white)) 
		ylabel(70(10)100, labsize(small) angle(horizontal) nogrid) yscale(nofextend) 
		plotregion(lcolor(white) margin(l=4))
saving(boxsincere, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export boxsincere.pdf, replace





#delimit ;
twoway scatter sincere  COUNTRY, mlabel(eid)
	legend(off)        // get rid of legend
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
	xtitle(" ", color(black) alignment(bottom )) xscale(noline)  
	ytitle("Share of Sincere Voting")              // incl. axis titles
	ylabel(70(10)100, labsize(small) angle(horizontal)) yscale(nofextend)  // use smaler & readable labels
	xlabel(0 " " 1 "France" 2 "Germany" 3 "Spain" 4 " ", noticks)                                    // use less labels
		graphregion(fcolor(white)) 
	plotregion(lcolor(white) margin(l=4))	/* get rid of box (give it the same color as the background) except the axes & increase left margin*/
saving(scatter_sincere1, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export scatter_sincere1.pdf, replace




#delimit ;
twoway scatter sincere  COUNTRY, mlabel(level_new)
	legend(off)        // get rid of legend
	scheme(s1mono)     // use white background to save ink graphregion(color(white))
	xtitle(" ", color(black) alignment(bottom )) xscale(noline)  
	ytitle("Share of Sincere Voting")              // incl. axis titles
	ylabel(40(10)80, labsize(small) angle(horizontal)) yscale(nofextend)  // use smaler & readable labels
	xlabel(2 "France" 4 "Germany" 3 "Spain" 5 " ")                                    // use less labels
		graphregion(fcolor(white)) 
	plotregion(lcolor(white) margin(l=4))	/* get rid of box (give it the same color as the background) except the axes & increase left margin*/
saving(scatter_sincere, replace);                                 // - , replace - to let the graph be replaced by a new one
#delimit cr	
graph export scatter_sincere.pdf, replace


reg sincere national regional 
eststo est0
reg sincere national regional Germany Spain
eststo est1



#delimit ;
esttab est0 est1  using regional_levelsincere.rtf, replace b(%9.2f) se(%9.2f)  rtf  noobs star(* 0.10 ** 0.05)
title ("Table X.2: Sincere Voting across Electoral Arenas")
varlabels(_cons "Constant" sincere "Sincere Voting" national "National" regional "Regional"
          Germany "Germany" Spain "Spain" )
stats(r2) sfmt(%9.0f)  sca("r2 R-squared") nonotes
addnote ("N = `e(N)'. Standard errors in parentheses. * p < 0.10, ** p < 0.05")
order(national regional Germany Spain)
;
#delimit cr


save eid, replace

restore


*/


